/*************************************************************************
* This code calculates the local demographical controls at the county level
  and these controls are as follows:
      - a.Workforce percentage by  NAICS (Step 6)
      - b.Average age of local residents (Step 1)
      - c.Percentage of people with bachelor degree or higher (Step 2)
      - d.Average education in years (Step 2)
      - e.Percentage of retried population (Step 1)
      - f.Median income (Step 4) 
      - g.Average household duration (Step 3)

* Due to the restructuring of census from 2000 to 2010, above variables will
  be sourced from different places. 
**************************************************************************/

/*************************************************************************/
/************************** Part I: Census 2000 **************************/
/*************************************************************************/
* From rep.cenesus2000_v5, the following variables can be calcualted;
* a, c, d, h;
** industry, education, and tenure are all by blockgroups;
* Extract variables and reduce the dataset by blckgrps;
proc sql;
  create table blkgp
  as select distinct *
  from rep.census2000_v5 (keep = state county tract blkgrp
       H038: P037: nasci:);
  quit;

* Consolidate and calculate at county level;
proc sql;
  create table county2000
  as select distinct state, county,  
            /* Industry*/
            sum(nasci00) as nasci00,
            sum(nasci11) as nasci11,
            sum(nasci21) as nasci21,
            sum(nasci22) as nasci22,
            sum(nasci23) as nasci23,
            sum(nasci31) as nasci31,
            sum(nasci42) as nasci42,
            sum(nasci44) as nasci44,
            sum(nasci48) as nasci48,
            sum(nasci51) as nasci51,
            sum(nasci52) as nasci52,
            sum(nasci53) as nasci53,
            sum(nasci61) as nasci61,
            sum(nasci62) as nasci62,
            sum(nasci71) as nasci71,
            sum(nasci72) as nasci72,
            sum(nasci81) as nasci81,
            sum(nasci92) as nasci92,
            /*Education */
            ( sum(P037015)
             +sum(P037016)
             +sum(P037017)
             +sum(P037018))/sum(P037001) as ratio_clgplus,
            ( sum(P037004)*2
             +sum(P037005)*5.5
             +sum(P037006)*7.5
             +sum(P037007)*9
             +sum(P037008)*10
             +sum(P037009)*11
             +sum(P037010)*12
             +sum(P037011)*12
             +sum(P037012)*12.5
             +sum(P037013)*13.5
             +sum(P037014)*14
             +sum(P037015)*16
             +sum(P037016)*18
             +sum(P037017)*18
             +sum(P037018)*22)/sum(P037001) as mean_edu, 
            /* Household Tenure*/
            ( sum(H038003)*1
             +sum(H038004)*3.5
             +sum(H038005)*8
             +sum(H038006)*15.5
             +sum(H038007)*25.5
             +sum(H038008)*35)/sum(H038001) as mean_tenure
  from blkgp
  group by state, county;

  quit; 
data county2000;
  set county2000;
  year = 2000;
  FIPS = 1000*state+county;
  run; 


/*************************************************************************/
/********************* Part II: Additional files**************************/
/*************************************************************************/
* In this part, we tackle subsequent years and all other variables;

/*************************************************************************/
/**************** Step 1: Average Age and Retirement ********************/
/*** Also calcualte population density *******************/

* Files are downloaded separately for 1990-1999, 2000-2010, and 2010-2017
  source: https://www.census.gov/programs-surveys/popest/data/tables.2000.html;
  
* for each file, convert them to standard style: state/county fips, year, 
  age group, population;

* 1990-1999 files are stored in yearly txt files and are joined into one
  1990_1999 jumbo txt file;
* Import file;
DATA pop1;
  INFILE "C:\Users\User\Dropbox\SASLib\Thesis\census\1990_1999.txt";  
  INPUT
   YEAR  1-2
   FIPS  5-9
   AgeGrp  11-12
   RaceSex  14-14
   Ethenic  16-16
   Population 17-23;
  run; 
proc sort data = pop1; by FIPS year AgeGrp RaceSex Ethenic; run ;
proc sql;
  create table pop1_1
  as select distinct a.fips, 1900+a.year as year, a.agegrp, sum(a.population) as population
  from pop1 as a
  group by a.fips, a.year, a.agegrp;
  quit; 
* To make it consistent with later years, alter two things:
   1. AgeGrp 0 is used to denote total population across all ages
   2. Agegrp 1 is for 0-4 years old;
data pop1_2;
  set pop1_1;
  by Fips year;
  Retain lala;
  if AgeGrp = 0 then lala = population;
  if AgeGrp = 1 then population = population + lala; 
  if AgeGrp = 0 then population = 0; 
  run ;
proc sql;
  create table pop1_3
  as select distinct *, sum(population) as total_pop
  from pop1_2
  group by FIPS, YEAR;
  run; 
data pop1990_1999;
  set pop1_3;
  if AgeGrp = 0 then population = total_pop;
  keep FIPS YEAR AgeGrp Population; 
  run ;

* 2000-2010 file is stored in a single csv file;
* Import;
PROC IMPORT OUT= WORK.pop2
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\2000_2009.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN;
* Convert to the standard style;
%macro popyear;
data temp0;
  set pop2;
  year = 2000;
  rename ESTIMATESBASE2000 = population;
  FIPS = 1000*STATE+COUNTY;
  KEEP FIPS YEAR AGEGRP SEX ESTIMATESBASE2000;
  run; 
%do i = 1 %to 9;
  %let year = %eval(2000+&i);
  data temp&i; 
    set pop2; 
    year=&year; 
    rename POPESTIMATE&year = population
           age = agegrp;
    FIPS = 1000*STATE+COUNTY;
    KEEP FIPS YEAR AGEGRP SEX POPESTIMATE&year;
    run; 
%end; 
data pop2000_2009;
  set temp0-temp9;
  *Since sex = 0 is the total across both sexes;
  if sex =0; 
  keep FIPS YEAR AGEGRP POPULATION;
  run;  
%mend; 
%popyear; 

* 2010-2017 file is also stored in a single csv file;
* Import;
PROC IMPORT OUT= WORK.pop3
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\2010_2017.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN;

data pop2010_2017;
  set pop3; 
  FIPS = state*1000+county;
  if year >= 3;
  year = year+2007;
  rename tot_pop = population;
  keep FIPS YEAR AGEGRP TOT_POP;
  run; 

* Combine all dataset;
data allyears;
  set pop1990_1999 pop2000_2009 pop2010_2017;
  age = (AgeGrp-1)*5 + 2;
  * AgeGrp 0 is total, set age equal to 0 for each of group summation later on;
  if AgeGrp = 0 then age = 0;
  * Assume that average age in 85+ group is 90 years old;
  if AgeGrp = 18 then age = 90; 
  run; 
* Calculate total populationa nd population above 25 years;
proc sql;
  create table pop_total
  as select distinct a.FIPS, a.YEAR, a.population as pop_ct, 
                     sum(b.population) as pop_ct_25plus
  from allyears(where = (agegrp = 0)) as a, allyears(where = (agegrp>=6)) as b
  where a.FIPS = b.FIPS and a.year = b.year
  group by a.FIPS, a.Year;
  quit; 
* Import land areas by county; 
PROC IMPORT OUT= WORK.land2010
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\2010_land_areas.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=4; 
RUN;
data land2010;
  set land2010;
  year = 2010;
  keep GCT_STUB_target_geo_id2 SUBHD0303 year;
  rename GCT_STUB_target_geo_id2 = FIPS
         SUBHD0303 = land;
  run; 
PROC IMPORT OUT= WORK.land2000
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\2000_land_areas.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=4; 
RUN;
data land2000;
  set land2000;
  year = 2000;
  keep GCT_STUB_target_geo_id2 hc06 year;
  rename GCT_STUB_target_geo_id2 = FIPS
         HC06 = land;
  run; 
** combine two datasets and fill the gap;
data land;
  set land2000;
  output;
  do i = 1 to 10;
    year = 1989+i;
    output;
  end; 
  set land2010;
  output; 
  do i = 1 to 9;
    year = 2000+i;
  output; 
  end; 
  do i = 1 to 8;
    year = 2010+i;
  output; 
  end; 
  drop i; 
  run ;
* calculate population density; 
proc sql;
  create table pop_total
  as select distinct a.*, b.land
  from pop_total as a, land as b
  where a.FIPS = b.FIPS and a.year = b.year;
  quit; 
* Calculate average age by FIPS-Year;
proc sql;
  create table mean_age
  as select FIPS, YEAR, sum(age*population)/sum(population) as mean_age
  from allyears (where = (AgeGrp ne 0))
  group by FIPS, YEAR;
  quit; 
proc sort data = mean_age; by FIPS YEAR; run ;

* Calculate retire ratio by FIPS-YEAR;
proc sort data = allyears; by FIPS YEAR AGEGRP; run; 
data retire_ratio;
  set allyears;
  by FIPS YEAR;
  retain ctypop rtrpop; 
  if AGEGRP = 0 then ctypop = population; 
  if AGEGRP = 14 then rtrpop = population; 
  if AGEGRP > 14 then rtrpop = population + rtrpop;
  if AGEGRP = 18 then retire_ratio = rtrpop/ctypop;
  KEEP FIPS YEAR retire_ratio;
  if AGEGRP = 18; 
  run ;
proc sort data = retire_ratio; by FIPS YEAR; run ;


/*********************************************************************/
/********************** Step 2: Education*****************************/


* Education attainment by counties are available for American Communicty 
  Survey (ACS) since 2005 and Census 2000 long form;
* However, not all counties are incldued. ACS 1-year survey covers the 
  smallest sample whereas 5-year survey covers all counties; 
* At the time of this program, 5-year survey are available for 2009-2016.
  The plan is to use Census 2000's data for 1996-2008 and 5-year survey 
  for 2009-2016. For 2017, use 2016's data;
* The table to use is B15002 5-year survey, downloaded from American Fact
  Finder manually; 
* Note: the universe is population above 25 years old;
*import; 
%macro edu;
%do i = 1 %to 8;
%let year = %sysfunc(putn(%eval(&i+8),z2.));
PROC IMPORT OUT= WORK.edu&i
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\ACS_&year._5YR_B15002_with_ann.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN; 
data edu&i;
  set edu&i;
  year = 2008+&i;
  drop HD02: GEO_id GEO_display_label;
  rename GEO_id2 = FIPS;
  run ;
%end;

%mend; 
%edu; 
* Calculate ratios and mean edu in years;
data eduall;
  set edu1-edu8;
  array duration{16} d1-d16 (0 2 5.5 7.5 9 10 11 12 12 12.5 13.5 14 16 18 18 22);
  array edu_M{16} HD01_VD03-HD01_VD18;
  array edu_F{16} HD01_VD20-HD01_VD35;
  array ed{16} ed1-ed16;
  do i = 1 to 16;
    ed[i] = (edu_M[i]+edu_F[i])*duration[i];
  end; 
  mean_edu = sum(of ed1-ed16)/HD01_VD01;
  ratio_clgplus = (sum(of HD01_VD15-HD01_VD18)+sum(of HD01_VD32-HD01_VD35))/HD01_VD01;
  keep FIPS YEAR mean_edu ratio_clgplus;
  run ;
data edu;
  set eduall county2000 (keep=FIPS YEAR mean_edu ratio_clgplus);
  run; 
proc sort data = edu; by FIPS YEAR; run ;
/** Use year 2000's data to fill 1990-1999 and 2001-2004;*/
/** Use year 2009's data to fill 2005-2008;*/
/** Use year 2016's data to fill 2017;*/
/*data edu;*/
/*  set edu9;*/
/*  output;*/
/*  if year = 2000 then do;*/
/*    do i = 1 to 10;*/
/*      year = 1989+i;*/
/*      output;*/
/*    end; */
/*    do i = 1 to 4;*/
/*      year = 2000+i;*/
/*      output;*/
/*    end; */
/*  end; */
/*  if year = 2009 then do;*/
/*    do i = 1 to 4;*/
/*      year = 2004+i;*/
/*      output;*/
/*    end; */
/*  end; */
/*  if year = 2016 then do;*/
/*    do i = 1 to 1;*/
/*      year = 2017;*/
/*      output; */
/*    end; */
/*  end; */
/*  drop i;*/
/*  run; */
/** Alternatively, use sas extrapolation to fill the gap;*/
/** Create time-series with missing values; */
/*data edu_missing;*/
/*  set edu9;*/
/*  output;*/
/*  if year = 2000 then do;*/
/*    do i = 1 to 10;*/
/*      year = 1989+i;*/
/*      mean_edu = .;*/
/*      ratio_clgplus = .;*/
/*      output;*/
/*    end; */
/*    do i = 1 to 4;*/
/*      year = 2000+i;*/
/*      mean_edu = .;*/
/*      ratio_clgplus = .;*/
/*      output;*/
/*    end; */
/*  end; */
/*  if year = 2009 then do;*/
/*    do i = 1 to 4;*/
/*      year = 2004+i;*/
/*      mean_edu = .;*/
/*      ratio_clgplus = .;*/
/*      output;*/
/*    end; */
/*  end; */
/*  if year = 2016 then do;*/
/*    do i = 1 to 1;*/
/*      year = 2017;*/
/*      mean_edu = .;*/
/*      ratio_clgplus = .;*/
/*      output; */
/*    end; */
/*  end; */
/*  drop i;*/
/*  run; */
/*proc sort data = edu_missing; by FIPS year; run; */
/*proc timeseries data=edu_missing out=test;*/
/*  id date interval=year start='01jan2017'd;*/
/*  var ym_yld rttm_int / setmissing=missing;*/
/*run;*/
/*proc expand data=edu_missing  out=edu_ex extrapolate;*/
/*  by FIPS;*/
/*  id year;*/
/*  convert mean_edu=edu_ext / method=spline(natural);*/
/*run;*/
/*********************************************************************/
/************** Step 3: Household Moved in Year**********************/

* The moved-in date is again availabe for census 2000 H38 table and 
  ASC 5-year Survey B25038 table for 2009-2016;
* Change: use the median mvoed-in date as the variable:
  H39 table for 2000
  B25039 table for 2009-2017
  Files downloaded from American Fact Finder Guided Search;
* Import the files;
%macro movedin;
PROC IMPORT OUT= WORK.movedin0
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\DEC_00_SF3_H039_with_ann.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN; 
data movedin0;
  set movedin0;
  rename geo_id2 = FIPS;
  year = 2000;
  movedin = year - VD02;
  keep geo_id2 year movedin; 
  run;
%do i = 1 %to 8; 
%let year = %sysfunc(putn(%eval(&i+8),z2.));
PROC IMPORT OUT= WORK.movedin&i
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\ACS_&year._5YR_B25039_with_ann.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN; 
data movedin&i;
  set movedin&i;
  rename geo_id2 = FIPS;
  year = 2008+&i;
  movedin = year - HD01_VD02;
  keep geo_id2 year movedin; 
  run; 
%end; 
%mend; 
%movedin; 
*Combine; 
data movedin;
  set movedin0-movedin8;
  run; 
proc sort data = movedin; by FIPS YEAR; run ;



/*********************************************************************/
/************** Step 4: Median Household Income **********************/

* Data are downloaded from Census SAIPE from 1997-2016; 
PROC IMPORT OUT= WORK.income0
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\median_income_97_16.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN; 
data income;
  set income0 (rename = (median_income = median_incomec));
  rename county_id = FIPS;
  median_income = input(median_incomec,dollar9.);
  keep county_id year median_income; 
  run; 
proc sort data = income; by FIPS year; run; 



/*********************************************************************/
/*********************** Step 5: Industry ****************************/

* Industry information are provided by table C24030 in ACS 5-year survey
  from 2009-2016;
* Import files; 
%macro ind;
%do i = 1 %to 8; 
%let year = %sysfunc(putn(%eval(&i+8),z2.));
PROC IMPORT OUT= WORK.ind&i
            DATAFILE= "C:\Users\User\Dropbox\SASLib\Thesis\census\ACS_&year._5YR_C24030_with_ann.csv" 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=3; 
RUN; 
data ind&i;
  set ind&i;
  year = 2008+&i; 
  array ind_male [27] HD01_VD02-HD01_VD28; 
  array ind_female [27] HD01_VD29-HD01_VD55; 
  do i = 1 to 27;
    ind_male[i] = ind_male[i] + ind_female[i];
  end;
  rename geo_id2 = FIPS
        HD01_VD01 = nasci00 /*total*/
        HD01_VD04 = nasci11 /*11*/
        HD01_VD05 = nasci21 /*21*/
        HD01_VD12 = nasci22 /*22*/
        HD01_VD06 = nasci23 /*23*/
        HD01_VD07 = nasci31 /*31-33*/
        HD01_VD08 = nasci42 /*42*/
        HD01_VD09 = nasci44 /*44-45*/
        HD01_VD11 = nasci48 /*48-49*/
        HD01_VD13 = nasci51 /*51*/
        HD01_VD15 = nasci52 /*52*/
        HD01_VD16 = nasci53 /*53*/
        HD01_VD22 = nasci61 /*61*/
        HD01_VD23 = nasci62 /*62*/
        HD01_VD25 = nasci71 /*71*/
        HD01_VD26 = nasci72 /*72*/
        HD01_VD27 = nasci81 /*81*/
        HD01_VD28 = nasci92  /*92*/;
  run ;   
data ind&i;
  set ind&i;
 
  keep FIPS YEAR NASCI:;
  run; 
%end; 
%mend; 
%ind; 

* Combine;
data ind;
  set ind1-ind8 county2000 (keep=FIPS YEAR nasci:);
    nasci00_ratio = nasci00/nasci00;
    nasci11_ratio = nasci11/nasci00;
    nasci21_ratio = nasci21/nasci00;
    nasci22_ratio = nasci22/nasci00;
    nasci23_ratio = nasci23/nasci00;
    nasci31_ratio = nasci31/nasci00;
    nasci42_ratio = nasci42/nasci00;
    nasci44_ratio = nasci44/nasci00;
    nasci48_ratio = nasci48/nasci00;
    nasci51_ratio = nasci51/nasci00;
    nasci52_ratio = nasci52/nasci00;
    nasci53_ratio = nasci53/nasci00;
    nasci61_ratio = nasci61/nasci00;
    nasci62_ratio = nasci62/nasci00;
    nasci71_ratio = nasci71/nasci00;
    nasci72_ratio = nasci72/nasci00;
    nasci81_ratio = nasci81/nasci00;
    nasci92_ratio = nasci92/nasci00;    
  run; 
proc sort data = ind; by FIPS YEAR; run ;


/*********************************************************************/
/*********************************************************************/
/*********************************************************************/
* Combine all files and extrapolate; 
* work.edu (mean_edu and ratio_clgplus), 
  work.ind (nasci),
  work.movedin (movedin)
  are all from 2000 and 2009-2016. Need to extrapolate to 1990-2017;
* work.income (median_income) from 1997-2016;
* work.mean_age
  work.ratio_retire are from 1990-2017;
* 1. Combine work.edu, work.movedin, and work.ind; 
data merge0;
  merge edu
        ind
        movedin
        ;
  by FIPS Year;
  run; 

* 1.1 create year from 1990-2017 with missing gaps;
* Create time-series with missing values; 
data merge1;
  set merge0;
  array var [*] mean_edu--movedin;
  output;
  if year = 2000 then do;
    do i = 1 to 10;
      year = 1989+i;
      * set all other varaibles to missing; 
      do j = 1 to dim(var);
        var[j] = . ;
      end; 
      output;
    end; 
    do i = 1 to 4;
      year = 2000+i;
      do j = 1 to dim(var);
        var[j] = . ;
      end; 
      output;
    end; 
  end; 
  if year = 2009 then do;
    do i = 1 to 4;
      year = 2004+i;
      do j = 1 to dim(var);
        var[j] = . ;
      end; 
      output;
    end; 
  end; 
  if year = 2016 then do;
    do i = 1 to 1;
      year = 2017;
      do j = 1 to dim(var);
        var[j] = . ;
      end; 
      output; 
    end; 
  end; 
  drop i j;
  run; 
proc sort data = merge1; by FIPS year; run; 
proc expand data=merge1  out=merge2 extrapolate;
  by FIPS;
  id year;
  convert mean_edu--movedin / method=spline(natural);
run;
* 2. Extrapolate work.income;
data merge3;
  set income;
  array var [*] median_income;
  output;
  if year = 1997 then do;
    do i = 1 to 7;
      year = 1989+i;
      * set all other varaibles to missing; 
      do j = 1 to dim(var);
        var[j] = . ;
      end; 
      output;
    end; 
  end;
  if year = 2016 then do;
    do i = 1 to 1;
      year = 2017;
      do j = 1 to dim(var);
        var[j] = . ;
      end; 
      output; 
    end; 
  end; 
  drop i j;
  run; 
proc sort data = merge3; by FIPS year; run; 
proc expand data=merge3  out=merge4 extrapolate;
  by FIPS;
  id year;
  convert median_income / method=spline(natural);
run;
* 3. combine merge2, merge4, mean_age, and ratio_retire;
data TEST0;
  merge mean_age
        retire_ratio
        merge4
        merge2
        ;
  run; 
* 4. Add suffix to all extrapolated variables;
proc sql;
  select cat(name, ' = ', cats(name, '_ctex' )) 
  into :renstr separated 
  by ' ' 
  from dictionary.columns 
  where libname = 'WORK' and memname='TEST0';
quit;

data county_ctrl_extpl;
  set TEST0 (rename = (&renstr));
  * rename FIPS YEAR mean_age retire_ratio back;
  rename FIPS_ctex = FIPS
         YEAR_ctex = YEAR
         mean_age_ctex = mean_age_ct
         retire_ratio_ctex = retire_ratio_ct
         ;
run;

* 5. Since sas extrapolation are not always reliable;
* Adjust work.edu, work.movedin, and work.ind with following; 
  * Use year 2000's data to fill 1990-1999 and 2001-2004;
  * Use year 2009's data to fill 2005-2008;
  * Use year 2016's data to fill 2017;
* Adjust work.income with the following ;
  * Use year 1997's data to fill 1990-1996; 
   * Use year 2016's data to fill 2017;
data fill0;
  set merge0;
  array var [*] mean_edu--movedin;
  output;
  if year = 2000 then do;
    do i = 1 to 10;
      year = 1989+i;
      output;
    end; 
    do i = 1 to 4;
      year = 2000+i;
      output;
    end; 
  end; 
  if year = 2009 then do;
    do i = 1 to 4;
      year = 2004+i;
      output;
    end; 
  end; 
  if year = 2016 then do;
    do i = 1 to 1;
      year = 2017;
      output; 
    end; 
  end; 
  drop i;
  run; 
proc sort data = fill0; by FIPS year; run; 

data fill1;
  set income;
  output;
  if year = 1997 then do;
    do i = 1 to 7;
      year = 1989+i;
      output;
    end; 
  end;
  if year = 2016 then do;
    do i = 1 to 1;
      year = 2017;
      output; 
    end; 
  end; 
  drop i;
  run; 
proc sort data = fill1; by FIPS year; run; 

data TEST1;
  merge mean_age
        retire_ratio
        fill1
        fill0
        ;
  run; 
* Add suffix bkfl;
proc sql;
  select cat(name, ' = ', cats(name, '_ctbk' )) 
  into :renstr separated 
  by ' ' 
  from dictionary.columns 
  where libname = 'WORK' and memname='TEST0';
quit;

data county_ctrl_bkfl;
  set TEST1 (rename = (&renstr));
  * rename FIPS YEAR mean_age retire_ratio back;
  rename FIPS_ctbk = FIPS
         YEAR_ctbk = YEAR
         mean_age_ctbk = mean_age_ct
         retire_ratio_ctbk = retire_ratio_ct
         ;
run;

* Save and output to rep lib;
data rep.county_ctrl_2018;
  merge county_ctrl_bkfl 
        county_ctrl_extpl
        pop_total
        ;
  by FIPS YEAR;
  run ;
* Note: Puerto Rico counties with FIPS starting with 7 are missing for 
  mean_age and retire_ratio;  

